library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
myurl <- "https://raw.githubusercontent.com/reisanar/datasets/master/WorldCupMatches.csv"
worldcup <- read.csv(myurl)
worldcup
Look at the world_bank_pop function
world_bank_pop
Look at the WorldPhone function
WorldPhones
## N.Amer Europe Asia S.Amer Oceania Africa Mid.Amer
## 1951 45939 21574 2876 1815 1646 89 555
## 1956 60423 29990 4708 2568 2366 1411 733
## 1957 64721 32510 5230 2695 2526 1546 773
## 1958 68484 35218 6662 2845 2691 1663 836
## 1959 71799 37598 6856 3000 2868 1769 911
## 1960 76036 40341 8220 3145 3054 1905 1008
## 1961 79831 43173 9053 3338 3224 2005 1076
Take a glimpse of worldcup
glimpse(worldcup)
## Rows: 4,572
## Columns: 20
## $ Year <int> 1930, 1930, 1930, 1930, 1930, 1930, 1930, 1930, 1~
## $ Datetime <chr> "13 Jul 1930 - 15:00 ", "13 Jul 1930 - 15:00 ", "~
## $ Stage <chr> "Group 1", "Group 4", "Group 2", "Group 3", "Grou~
## $ Stadium <chr> "Pocitos", "Parque Central", "Parque Central", "P~
## $ City <chr> "Montevideo ", "Montevideo ", "Montevideo ", "Mon~
## $ Home.Team.Name <chr> "France", "USA", "Yugoslavia", "Romania", "Argent~
## $ Home.Team.Goals <int> 4, 3, 2, 3, 1, 3, 4, 3, 1, 1, 6, 4, 1, 4, 3, 6, 6~
## $ Away.Team.Goals <int> 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 1, 1~
## $ Away.Team.Name <chr> "Mexico", "Belgium", "Brazil", "Peru", "France", ~
## $ Win.conditions <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ",~
## $ Attendance <int> 4444, 18346, 24059, 2549, 23409, 9249, 18306, 183~
## $ Half.time.Home.Goals <int> 3, 2, 2, 1, 0, 1, 0, 2, 0, 0, 3, 1, 1, 4, 2, 1, 3~
## $ Half.time.Away.Goals <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1~
## $ Referee <chr> "LOMBARDI Domingo (URU)", "MACIAS Jose (ARG)", "T~
## $ Assistant.1 <chr> "CRISTOPHE Henry (BEL)", "MATEUCCI Francisco (URU~
## $ Assistant.2 <chr> "REGO Gilberto (BRA)", "WARNKEN Alberto (CHI)", "~
## $ RoundID <int> 201, 201, 201, 201, 201, 201, 201, 201, 201, 201,~
## $ MatchID <int> 1096, 1090, 1093, 1098, 1085, 1095, 1092, 1097, 1~
## $ Home.Team.Initials <chr> "FRA", "USA", "YUG", "ROU", "ARG", "CHI", "YUG", ~
## $ Away.Team.Initials <chr> "MEX", "BEL", "BRA", "PER", "FRA", "MEX", "BOL", ~
Arrange world cup in Year, Datetime, Stage, Stadium
arrange(worldcup, Year, Datetime, Stage, Stadium)
arrange Home.Team.Goals and year in descending order
arrange(worldcup, desc(Home.Team.Goals, Year))
Select everything that ends with Goals
select(worldcup,
Home.Team.Goals:Away.Team.Goals,
ends_with("Goals"),
Half.time.Home.Goals,
Half.time.Away.Goals)
create a function called worldcup_data
worldcup_data = select(worldcup,
Home.Team.Goals:Away.Team.Goals,
ends_with("Goals"),
Half.time.Home.Goals,
Half.time.Away.Goals)
worldcup_data
mutate a function called Total_fh_goals and Total_ht_goals which make the first value add Home.Team.Goals and Away.Team.Goals second value add Half.Time.Home.Goals and Half.time.Away.Goals
mutate(worldcup_data,
Total_fh_goals = Home.Team.Goals + Away.Team.Goals,
Total_ht_goals = Half.time.Home.Goals + Half.time.Away.Goals
)
Find the mean of Home.Team.Goals
summarise(worldcup, hometeamgoals = mean(Home.Team.Goals, na.rm = TRUE))
group Year and find the avg
worldcup %>%
group_by(Year) %>%
summarize(
count = n(),
avg = mean(Year, na.rm = TRUE)
)
List only the first 10 of the Dataset
head(worldcup, 10)
Filter all the Argentina games
filter(worldcup, Home.Team.Name == "Argentina")
Filter all the Away.Team.Name of Argentina
argen1 = worldcup %>%
filter(Away.Team.Name == "Argentina")
argen1
Create a geom_point() to see the Goals scored each year for the Away.Team.Goals for Argentina
ggplot(data = argen1) +
geom_point(aes(x = Year, y = Away.Team.Goals))
Create function named argen that Lists only Argentina
argen = worldcup %>%
filter(Home.Team.Name == "Argentina")
argen
Filter all attendance greater than 67800
worldcup %>%
filter(Attendance > 67800)
Arrange to show the most Home.Team.Goals
arrange(worldcup, desc(Home.Team.Goals))
make a geom_point() with x being Year and y being Home.Team.Goals
ggplot(data = argen) +
geom_point(aes(x = Year, y = Home.Team.Goals))
filter all Home.Team.Name for Portugal
portg = worldcup %>%
filter(Home.Team.Name == "Portugal")
portg
Filter all Away.Team.Name for Portugal
portg1 = worldcup %>%
filter(Away.Team.Name == "Portugal")
portg1
make a geom_point() with the portg1 function with x being Year and y being Away.Team.Goals
ggplot(data = portg1) +
geom_point(aes(x = Year, y = Away.Team.Goals))
Filter just the
Old Trafford Stadium
stad = worldcup %>%
filter(Stadium == "Old Trafford Stadium")
stad
Create a boxplot with x being Stadium and y being Attendance
ggplot(data = stad) +
geom_boxplot(aes(x = Stadium, y = Attendance))
all_arg_players = worldcup %>%
filter(Home.Team.Name == "Argentina")
all_arg_players
all_por_players = worldcup %>%
filter(Home.Team.Name == "Portugal")
all_por_players
worldcup %>%
filter(Year == 1990, Home.Team.Name == "Argentina")
create a geom_point() x being Year and y being Home.Team.Goals
ggplot(data = portg) +
geom_point(aes(x = Year, y = Home.Team.Goals))
Filter
RoundID > 300 and Away.Team.Initials == "ARG"
worldcup %>%
filter(RoundID > "300", Away.Team.Initials == "ARG")
worldcup %>%
filter(RoundID > "300", Away.Team.Initials == "POR")
create a geom_point() and make x Year and Attendance
ggplot(data = worldcup) +
geom_point(aes(x = Year, y = Attendance))
## Warning: Removed 3722 rows containing missing values (geom_point).
Filter
Year > 1996 and RoundID > 6000
worldcup %>%
filter(Year > 1996, RoundID > 6000)
Create geom_point() City as color size as Home.Team.Goals and alpha = 0.9
ggplot(data = worldcup) +
geom_point(aes(x = Year, y = Attendance,
color = City, size = Home.Team.Goals),
alpha = 0.9)
## Warning: Removed 3722 rows containing missing values (geom_point).
Which team had the most Home Team Goals?
worldcup %>%
arrange(desc(Home.Team.Goals))
Which team had the most Away Team Goals?
worldcup %>%
arrange(desc(Away.Team.Goals))
Make a bar graph for Year
ggplot(data = worldcup) +
geom_bar(aes(x = Year))
## Warning: Removed 3720 rows containing non-finite values (stat_count).
Count the total amount of Referees
worldcup %>%
count(Referee)
Create a histogram for the Attendance
ggplot(data = worldcup) +
geom_histogram(aes(x = Attendance))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3722 rows containing non-finite values (stat_bin).
Use a dark green color to draw a geom_point
ggplot(worldcup, aes(x = Year, y = Attendance)) +
geom_point(color = "darkgreen")
## Warning: Removed 3722 rows containing missing values (geom_point).
Change the title to home team goals and use a bargraph
ggplot(worldcup, aes(x = Home.Team.Goals)) +
geom_histogram(fill = "green", color = "blue") +
ggtitle("Home Team goals") +
xlab("Goals") + xlim(c(0, 10))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3720 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
Create a scatter plot for Away Team Goals
ggplot(worldcup, aes(x = Home.Team.Goals, y = Half.time.Home.Goals)) +
geom_point()
## Warning: Removed 3720 rows containing missing values (geom_point).
select all rows except Referee
worldcup %>%
select(-Referee)